Project Part 2

Group 9

Authors
Affiliation

Thimoté Dupuch

University of Twente

Joris van Lierop

University of Twente

Jurre van Sijpveld

University of Twente

Published

September 23, 2024

Loading libraries

library(dplyr)
library(forcats)
library(ggplot2)
library(plotly)
library(broom)
library(boot)
library(caret)
library(pROC)

Loading dataset

dataset <- read.csv("SMARTc.csv", sep = ";") # Without missing values

Re-encode the categorical variables

dataset <- mutate(dataset,
    EVENT = factor(EVENT),
    EVENT = fct_recode(EVENT, "no" = "0", "yes" = "1"),
    SEX = factor(SEX),
    SEX = fct_recode(SEX, "male" = "1", "female" = "2"),
    DIABETES = factor(DIABETES),
    DIABETES = fct_recode(DIABETES, "no" = "0", "yes" = "1"),
    SMOKING = factor(SMOKING),
    SMOKING = fct_recode(SMOKING, "never" = "1", "former" = "2", "current" = "3"),
    alcohol = factor(alcohol),
    alcohol = fct_recode(alcohol, "never" = "1", "former" = "2", "current" = "3"),
    CEREBRAL = factor(CEREBRAL),
    CEREBRAL = fct_recode(CEREBRAL, "no" = "0", "yes" = "1"),
    CARDIAC = factor(CARDIAC),
    CARDIAC = fct_recode(CARDIAC, "no" = "0", "yes" = "1"),
    AAA = factor(AAA),
    AAA = fct_recode(AAA, "no" = "0", "yes" = "1"),
    PERIPH = factor(PERIPH),
    PERIPH = fct_recode(PERIPH, "no" = "0", "yes" = "1"),
    albumin = factor(albumin),
    albumin = fct_recode(albumin, "no" = "1", "micro" = "2", "macro" = "3"),
    STENOSIS = factor(STENOSIS),
    STENOSIS = fct_recode(STENOSIS, "no" = "0", "yes" = "1"),
)

Logistic regression model of EVENT

Accessing performance using cross-validation

k <- 5
folds <- createFolds(dataset$EVENT, k = k, list = TRUE, returnTrain = FALSE)
roc_list <- list()

for (i in 1:k) {
    train <- dataset[-folds[[i]], ]
    test <- dataset[folds[[i]], ]

    fit_train <- glm(EVENT ~ AGE + SEX + BMI + SYSTH + HDL + DIABETES +
        HISTCAR2 + HOMOC + log(CREAT) + STENOSIS + IMT + SMOKING +
        alcohol + albumin, data = train, family = "binomial")

    predict_test <- predict(fit_train, newdata = test, type = "response")
    roc_i <- roc(test$EVENT, predict_test)
    roc_list[[i]] <- roc_i
}
display code to plot the ROC curves
roc_df <- do.call(rbind, lapply(1:length(roc_list), function(i) {
    data.frame(
        Fold = paste("Fold", i),
        Sensitivity = roc_list[[i]]$sensitivities,
        Specificity = 1 - roc_list[[i]]$specificities
    )
}))

roc_plot <- ggplot(roc_df, aes(x = Specificity, y = Sensitivity, color = Fold)) +
    geom_line(linewidth = 0.8) +
    scale_color_brewer(palette = "Set1") +
    theme_minimal(base_size = 14) +
    labs(
        title = "ROC Curves for Each Fold",
        x = "1 - Specificity",
        y = "Sensitivity",
        color = "Fold"
    ) +
    theme(
        plot.title = element_text(hjust = 0.5, size = 15, face = "bold"),
        legend.position = "bottom"
    ) +
    coord_equal()

ggplotly(roc_plot)